import pandas as pd
import numpy as np
import pickle

atp_tennis = pd.read_csv('atp_tennis.csv')

# Find Federer matches
federer_matches = atp_tennis[(atp_tennis['Player_1'] == 'Federer R.') | (atp_tennis['Player_2'] == 'Federer R.')]
federer_opponents = federer_matches['Player_1'].where(federer_matches['Player_1'] != 'Federer R.', federer_matches['Player_2']).unique()

# Retrieve unique surface types
surface_types = atp_tennis['Surface'].unique()

def calculate_performance(player, opponent, data):
    player_matches = data[((data['Player_1'] == player) & (data['Player_2'] == opponent)) | ((data['Player_1'] == opponent) & (data['Player_2'] == player))]
    total_matches = len(player_matches)
    wins = len(player_matches[player_matches['Winner'] == player])
    win_rate = wins / total_matches if total_matches > 0 else 0

    h2h_record = wins / total_matches if total_matches > 0 else 0

    surface_performance = {}
    for surface in surface_types:
        player_surface_matches = player_matches[player_matches['Surface'] == surface]
        player_surface_wins = len(player_surface_matches[player_surface_matches['Winner'] == player])
        surface_performance[surface] = player_surface_wins / player_surface_matches.shape[0] if player_surface_matches.shape[0] > 0 else 0

    return win_rate, h2h_record, surface_performance

# Initialize an empty dataframe for storing the results
columns = ['Player', 'Opponent', 'Win Rate', 'Head-to-Head'] + [f'{surface} Surface Performance' for surface in surface_types]
player_stats = pd.DataFrame(columns=columns)

# Loop through Federer's opponents
for opponent in federer_opponents:
    # Calculate win rate, head-to-head record, and surface performance for Raonic M. against each opponent
    win_rate, h2h_record, surface_performance = calculate_performance('Raonic M.', opponent, atp_tennis)

    # Append the results to the dataframe
    row_data = {'Player': 'Raonic M.', 'Opponent': opponent, 'Win Rate': win_rate, 'Head-to-Head': h2h_record}
    row_data.update({f'{surface} Surface Performance': surface_performance[surface] for surface in surface_types})
    player_stats = pd.concat([player_stats, pd.DataFrame(row_data, index=[0])], ignore_index=True)

# Display the resulting dataframe
print(player_stats)
pickle.dump(player_stats,open("./ref_result/player_stats.pkl","wb"))